[~/ENSIIE/INSPIRATION/IMB/mpi-benchmarks/src]                                                                                                                                                                                                
--|| curie71 | main | 1000 ||--> ccc_mprun -N 2 -n 32 -p standard IMB-NBC -npmin 32                                                                                                                                                          
#------------------------------------------------------------                                                                                                                                                                                
#    Intel (R) MPI Benchmarks 2018, MPI-NBC part                                                                                                                                                                                             
#------------------------------------------------------------                                                                                                                                                                                
# Date                  : Thu Nov 30 15:42:13 2017                                                                                                                                                                                           
# Machine               : x86_64                                                                                                                                                                                                             
# System                : Linux                                                                                                                                                                                                              
# Release               : 2.6.32-696.13.2.el6.Bull.128.x86_64                                                                                                                                                                                
# Version               : #1 SMP Tue Oct 10 01:42:23 CEST 2017                                                                                                                                                                               
# MPI Version           : 3.1                                                                                                                                                                                                                
# MPI Thread Environment:                                                                                                                                                                                                                    


# Calling sequence was: 

# IMB-NBC -npmin 32

# Minimum message length in bytes:   0
# Maximum message length in bytes:   4194304
#                                           
# MPI_Datatype                   :   MPI_BYTE 
# MPI_Datatype for reductions    :   MPI_FLOAT
# MPI_Op                         :   MPI_SUM  
#                                             
#                                             

# List of Benchmarks to run:

# Ibcast
# Iallgather
# Iallgatherv
# Igather    
# Igatherv   
# Iscatter   
# Iscatterv  
# Ialltoall  
# Ialltoallv 
# Ireduce    
# Ireduce_scatter
# Iallreduce     
# Ibarrier       

#-----------------------------------------------------------------------------
# Benchmarking Ibcast                                                         
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000        18.02        14.13        14.03        71.80
            1         1000        18.63        14.54        14.47        71.40
            2         1000        18.57        14.60        14.56        72.54
            4         1000        18.53        14.53        14.47        72.08
            8         1000        18.47        14.47        14.38        71.77
           16         1000        18.59        14.57        14.46        71.68
           32         1000        18.80        14.70        14.65        71.79
           64         1000        19.07        15.02        14.92        72.35
          128         1000        25.90        22.38        22.31        83.96
          256         1000        26.88        23.09        23.02        83.32
          512         1000        28.50        24.69        24.63        84.34
         1024         1000        33.69        29.83        29.79        86.95
         2048         1000        45.89        41.15        41.10        88.36
         4096         1000        62.14        53.47        53.43        83.70
         8192         1000        82.87        71.20        71.14        83.52
        16384         1000       841.17       564.82       566.51        51.22
        32768         1000       876.84       571.27       573.58        46.73
        65536          640       986.60       618.62       620.01        40.65
       131072          320      1388.69      1025.41      1027.41        64.64
       262144          160      2684.26      1843.80      1848.44        54.53
       524288           80      5115.05      3301.32      3303.40        45.09
      1048576           40      9034.59      6395.32      6398.83        58.75
      2097152           20     20465.60     12385.98     12405.69        34.87
      4194304           10     40054.19     25350.76     25355.29        42.01

#-----------------------------------------------------------------------------
# Benchmarking Iallgather                                                     
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000       252.87       127.81       127.93         2.24
            1         1000       255.11       128.84       128.93         2.06
            2         1000       255.07       128.65       128.73         1.79
            4         1000       255.21       128.82       128.91         1.96
            8         1000       255.61       129.13       129.19         2.10
           16         1000       255.72       128.98       129.08         1.81
           32         1000       256.22       129.32       129.45         1.97
           64         1000       271.09       137.27       137.38         2.59
          128         1000       149.64       122.21       122.30        77.57
          256         1000       164.04       135.37       135.49        78.84
          512         1000       195.32       160.46       161.00        78.35
         1024         1000       251.82       205.24       205.53        77.34
         2048         1000       433.40       371.99       372.57        83.52
         4096         1000       665.71       570.79       572.06        83.41
         8192         1000      1057.57       926.92       928.50        85.93
        16384          788     24553.94     12652.65     12666.49         6.04
        32768          788     23874.84     12570.15     12585.60        10.18
        65536          640     24106.70     11794.52     11801.01         0.00
       131072          320     37822.11     19756.12     19769.78         8.62
       262144          160     65608.10     33655.76     33693.76         5.17
       524288           80    123703.68     61019.70     61067.28         0.00
      1048576           40    261121.45    129263.34    129350.41         0.00
      2097152           20    606344.24    302890.56    302907.65         0.00
      4194304 time-out.; Time limit (secs_per_sample * msg_sizes_list_len) is over; use "-time X" or SECS_PER_SAMPLE=X (IMB_settings.h) to increase time limit.

#-----------------------------------------------------------------------------
# Benchmarking Iallgatherv                                                    
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000       261.05       131.90       132.02         2.17
            1         1000       263.68       133.34       133.41         2.30
            2         1000       266.04       133.02       133.35         0.24
            4         1000       263.67       133.14       133.27         2.05
            8         1000       263.89       133.27       133.35         2.05
           16         1000       264.74       133.80       133.89         2.21
           32         1000       265.42       133.89       133.99         1.84
           64         1000       279.33       141.50       141.56         2.63
          128         1000       155.65       124.66       125.08        75.23
          256         1000       169.69       138.74       138.87        77.71
          512         1000       199.06       163.48       163.70        78.26
         1024         1000       260.69       212.39       212.63        77.28
         2048         1000       455.18       392.35       392.89        84.01
         4096         1000       678.74       587.46       588.33        84.48
         8192         1000      1053.31       921.48       923.35        85.72
        16384          841     22609.23     11824.07     11825.38         8.80
        32768          827     22596.19     12048.51     12046.69        12.44
        65536          640     24331.26     11934.41     11962.98         0.00
       131072          320     37923.71     19034.46     19118.44         1.20
       262144          160     66774.97     33435.48     33444.94         0.32
       524288           80    130375.80     62748.51     62762.88         0.00
      1048576           40    260549.19    125506.56    125544.76         0.00
      2097152           20    584962.21    279535.26    279658.01         0.00
      4194304 time-out.; Time limit (secs_per_sample * msg_sizes_list_len) is over; use "-time X" or SECS_PER_SAMPLE=X (IMB_settings.h) to increase time limit.

#-----------------------------------------------------------------------------
# Benchmarking Igather                                                        
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000        22.45        11.45        11.29         2.50
            1         1000        23.92        12.06        11.89         0.24
            2         1000        24.21        12.30        12.19         2.24
            4         1000        23.91        12.01        11.83         0.00
            8         1000        24.08        12.16        12.01         0.73
           16         1000        24.00        11.98        11.81         0.00
           32         1000        24.31        12.25        12.08         0.18
           64         1000        24.34        12.12        12.00         0.00
          128         1000        24.36        12.20        12.01         0.00
          256         1000        26.61        13.42        13.26         0.50
          512         1000        28.68        14.48        14.34         1.01
         1024         1000        33.91        17.15        17.02         1.57
         2048         1000        43.18        21.89        21.79         2.29
         4096         1000        83.86        41.98        41.76         0.00
         8192         1000       135.57        73.22        72.82        14.31
        16384         1000      2671.28      1359.12      1370.81         4.28
        32768         1000      2747.23      1416.86      1428.15         6.85
        65536          640      2889.03      1533.35      1544.92        12.25
       131072          320      3298.28      1740.97      1752.44        11.13
       262144          160      4292.82      2193.53      2193.78         4.31
       524288           80      7421.01      3760.41      3759.61         2.63
      1048576           40     13998.42      7093.66      7091.26         2.63
      2097152           20     43059.10     24616.38     24451.05        24.41
      4194304           10    266814.87    133456.94    132502.64         0.00

#-----------------------------------------------------------------------------
# Benchmarking Igatherv                                                       
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000        21.53        10.89        10.74         0.93
            1         1000        23.35        11.77        11.64         0.58
            2         1000        23.34        11.79        11.64         0.77
            4         1000        23.50        11.91        11.72         1.08
            8         1000        23.43        11.80        11.64         0.08
           16         1000        23.62        12.00        11.81         1.57
           32         1000        23.52        11.82        11.64         0.00
           64         1000        24.13        12.25        12.08         1.61
          128         1000        24.89        12.61        12.44         1.33
          256         1000        26.04        13.13        12.99         0.62
          512         1000        28.89        14.82        14.69         4.15
         1024         1000        33.00        16.62        16.47         0.59
         2048         1000        43.11        22.14        21.91         4.25
         4096         1000        84.34        42.40        42.08         0.33
         8192         1000       126.18        63.52        63.12         0.73
        16384         1000      2683.44      1380.77      1392.84         6.47
        32768         1000      2763.23      1438.11      1449.39         8.57
        65536          640      2913.05      1546.88      1557.20        12.27
       131072          320      3310.30      1754.47      1764.43        11.82
       262144          160      4240.17      2194.21      2203.53         7.15
       524288           80      7427.24      3716.25      3715.69         0.13
      1048576           40     13658.00      6958.13      6961.70         3.76
      2097152           20     37145.35     18638.99     18510.48         0.02
      4194304           10    266798.40    133482.29    132508.29         0.00

#-----------------------------------------------------------------------------
# Benchmarking Iscatter                                                       
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000       147.56        73.98        73.44         0.00
            1         1000       150.24        75.34        74.85         0.00
            2         1000       149.97        75.21        74.68         0.00
            4         1000       150.04        75.25        74.77         0.00
            8         1000       149.98        75.21        74.67         0.00
           16         1000       150.44        75.43        74.94         0.00
           32         1000       150.39        75.41        74.85         0.00
           64         1000       151.56        76.08        75.60         0.16
          128         1000        26.29        13.03        12.90         0.00
          256         1000        29.33        14.68        14.49         0.00
          512         1000        35.02        17.47        17.28         0.00
         1024         1000        47.01        23.29        23.10         0.00
         2048         1000        60.37        29.79        29.63         0.00
         4096         1000        87.25        43.94        43.67         0.81
         8192         1000       122.49        62.18        61.85         2.46
        16384         1000      2502.85      1253.84      1245.37         0.00
        32768         1000      2651.85      1328.12      1320.09         0.00
        65536          640      2952.83      1480.30      1470.14         0.00
       131072          320      3626.79      1818.32      1805.59         0.00
       262144          160      5556.18      2790.22      2770.01         0.15
       524288           80      9083.64      4555.03      4524.01         0.00
      1048576           40     15109.05      7570.53      7523.30         0.00
      2097152           20     23981.13     12094.82     12097.44         1.75
      4194304           10    195945.28    100067.81     99361.68         3.48

#-----------------------------------------------------------------------------
# Benchmarking Iscatterv                                                      
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000       150.60        75.45        74.95         0.00
            1         1000       153.08        76.61        76.12         0.00
            2         1000       159.54        83.21        82.64         7.58
            4         1000       153.01        76.64        76.13         0.00
            8         1000       153.04        76.61        76.12         0.00
           16         1000       153.30        76.78        76.28         0.00
           32         1000       153.56        76.92        76.39         0.00
           64         1000       154.86        77.66        77.09         0.00
          128         1000        27.45        13.57        13.43         0.00
          256         1000        30.54        15.18        15.04         0.00
          512         1000        36.39        18.13        17.99         0.00
         1024         1000        48.18        23.81        23.63         0.00
         2048         1000        61.77        30.63        30.43         0.00
         4096         1000        89.32        45.10        44.83         1.36
         8192         1000       123.16        62.37        62.04         1.99
        16384         1000      2537.29      1257.12      1250.53         0.00
        32768         1000      2649.92      1326.27      1317.35         0.00
        65536          640      2945.23      1472.25      1462.21         0.00
       131072          320      3636.64      1822.83      1810.07         0.00
       262144          160      5722.43      2868.17      2847.39         0.00
       524288           80      9172.28      4603.70      4570.47         0.04
      1048576           40     15089.25      7551.76      7504.89         0.00
      2097152           20     23976.14     12104.09     12105.14         1.93
      4194304           10    187993.76     89100.23     88472.33         0.00

#-----------------------------------------------------------------------------
# Benchmarking Ialltoall                                                      
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000       262.17       131.24       131.58         0.49
            1         1000       262.54       132.39       132.48         1.76
            2         1000       262.71       132.71       132.80         2.10
            4         1000       262.95       132.54       132.64         1.68
            8         1000       262.84       132.49       132.55         1.66
           16         1000       263.02       132.71       132.83         1.89
           32         1000       267.11       133.69       134.06         0.48
           64         1000       277.79       140.33       140.42         2.10
          128         1000       151.43       123.72       123.82        77.62
          256         1000       166.71       135.16       135.26        76.67
          512         1000       195.89       160.78       160.99        78.19
         1024         1000       256.98       208.32       208.62        76.67
         2048         1000       479.86       415.40       415.96        84.50
         4096         1000       705.59       605.15       606.10        83.43
         8192         1000      1087.37       946.92       948.42        85.19
        16384          483     40227.43     20750.46     20762.20         6.19
        32768          483     38661.12     20146.65     20158.08         8.15
        65536          482     39871.94     20178.32     20193.67         2.48
       131072          320     49199.36     25532.55     25552.62         7.38
       262144          160     79907.33     40954.50     40984.56         4.96
       524288           80    147702.35     74857.32     74939.74         2.80
      1048576           40    314979.98    167289.44    167357.53        11.75
      2097152 time-out.; Time limit (secs_per_sample * msg_sizes_list_len) is over; use "-time X" or SECS_PER_SAMPLE=X (IMB_settings.h) to increase time limit.

#-----------------------------------------------------------------------------
# Benchmarking Ialltoallv                                                     
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000         2.65         1.30         1.19         0.00
            1         1000       264.29       133.48       133.58         2.08
            2         1000       264.79       134.01       134.10         2.47
            4         1000       266.49       137.51       137.65         6.30
            8         1000       264.86       133.93       134.01         2.31
           16         1000       265.58       134.20       134.26         2.14
           32         1000       266.71       134.86       134.99         2.33
           64         1000       279.86       141.67       141.77         2.53
          128         1000       153.28       125.37       125.43        77.74
          256         1000       168.62       136.07       136.54        76.16
          512         1000       196.97       162.45       162.69        78.78
         1024         1000       256.18       207.68       207.94        76.67
         2048         1000       474.97       411.85       412.46        84.70
         4096         1000       699.03       600.72       601.63        83.66
         8192         1000      1084.21       943.29       945.27        85.09
        16384          485     39926.58     20725.03     20752.26         7.47
        32768          485     38638.09     20465.44     20475.99        11.25
        65536          485     40106.70     20177.58     20189.78         1.29
       131072          320     49281.99     25339.85     25356.44         5.58
       262144          160     79152.77     40997.64     41009.76         6.96
       524288           80    145277.76     74351.04     74376.44         4.64
      1048576           40    314084.74    172242.34    172329.32        17.69
      2097152 time-out.; Time limit (secs_per_sample * msg_sizes_list_len) is over; use "-time X" or SECS_PER_SAMPLE=X (IMB_settings.h) to increase time limit.

#-----------------------------------------------------------------------------
# Benchmarking Ireduce                                                        
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000         0.58         0.19         0.23         0.00
            4         1000        16.34        12.61        12.45        69.15
            8         1000        16.51        12.72        12.54        68.75
           16         1000        16.40        12.71        12.55        69.74
           32         1000        16.33        12.64        12.46        69.39
           64         1000        16.74        12.91        12.71        68.76
          128         1000        18.67        15.42        15.23        77.64
          256         1000        19.64        16.20        16.02        77.68
          512         1000        22.26        18.02        17.81        75.29
         1024         1000        25.16        20.51        20.32        76.38
         2048         1000        30.40        16.05        15.92         9.77
         4096         1000        46.18        22.72        22.54         0.00
         8192         1000        64.78        31.30        31.11         0.00
        16384         1000       425.35       179.87       179.01         0.00
        32768         1000       496.79       217.36       216.37         0.00
        65536          640       624.11       279.91       278.57         0.00
       131072          320       864.34       400.82       399.15         0.00
       262144          160      1356.62       649.15       646.04         0.00
       524288           80      2373.17      1137.99      1132.13         0.00
      1048576           40      4527.36      2160.63      2149.03         0.00
      2097152           20      8968.95      4202.46      4187.70         0.00
      4194304           10     18557.11      9181.55      9149.58         0.00

#-----------------------------------------------------------------------------
# Benchmarking Ireduce_scatter                                                
# #processes = 32                                                             
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0            1         0.811168671474213.16         0.20       100.00
            4            1        82.711168671474668.29         0.20       100.00
            8            1        79.691168637120948.35         0.18       100.00
           16            1        79.231168637121446.26         0.18       100.00
           32            1        80.711168671476173.58         0.19       100.00
           64            1        84.401168671476670.33         0.20       100.00
          128            1        82.311168637122951.52         0.19       100.00
          256            1        89.931168671477685.47         0.19       100.00
          512            1        87.411168671478199.62         0.20       100.00
         1024            1        91.441168671478721.02         0.21       100.00
         2048            1        91.961168671479263.12         0.19       100.00
         4096            1        55.401168637125487.51         0.19       100.00
         8192            1        65.641168637125929.82         0.23       100.00
        16384            1       274.681168637127074.28         0.19       100.00
        32768            1       314.311168637128593.19         0.19       100.00
        65536            1       381.211168637130371.10         0.21       100.00
       131072            1       526.001168637132705.47         0.20       100.00
       262144            1       831.991168637136172.96         0.20       100.00
       524288            1      2525.971168671499588.11         0.21       100.00
      1048576            1      3744.481168671514848.53         0.24       100.00
      2097152            1      6279.201168671540446.57         0.23       100.00
      4194304            1     12437.361168671588158.16         0.23       100.00

#-----------------------------------------------------------------------------
# Benchmarking Iallreduce
# #processes = 32
#-----------------------------------------------------------------------------
       #bytes #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
            0         1000         0.57         0.20         0.23         0.00
            4         1000        34.02        18.28        18.21        13.53
            8         1000        34.20        18.37        18.30        13.44
           16         1000        34.50        18.52        18.49        13.57
           32         1000        34.95        18.83        18.74        13.89
           64         1000        35.76        19.13        19.02        12.50
          128         1000        55.41        31.40        31.31        23.25
          256         1000        57.95        32.49        32.47        21.58
          512         1000        62.21        34.63        34.61        20.30
         1024         1000        73.48        40.53        40.49        18.62
         2048         1000       100.98        53.00        52.95         9.38
         4096         1000       140.97        71.39        71.42         2.58
         8192         1000       199.76       100.93       100.93         2.09
        16384         1000      1443.36       726.32       727.64         1.46
        32768         1000      1555.49       782.38       783.63         1.34
        65536          640       356.88       180.26       179.56         1.63
       131072          320       612.35       311.64       311.88         3.58
       262144          160       963.05       480.98       481.70         0.00
       524288           80     19166.54      9578.30      9581.93         0.00
      1048576           40     20617.79     10304.68     10308.31         0.00
      2097152           20     23420.30     11706.60     11710.33         0.00
      4194304           10     28740.24     14388.00     14393.75         0.29

#----------------------------------------------------------------
# Benchmarking Ibarrier
# #processes = 32
#----------------------------------------------------------------
 #repetitions t_ovrl[usec] t_pure[usec]  t_CPU[usec]   overlap[%]
         1000        54.00        30.78        30.72        24.37


# All processes entering MPI_Finalize

